{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 赛题链接 https://www.kaggle.com/c/LANL-Earthquake-Prediction/overview"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-06-05T10:47:03.298022Z",
     "start_time": "2019-06-05T10:46:42.821485Z"
    },
    "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
    "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
   },
   "outputs": [],
   "source": [
    "import time\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import xgboost as xgb\n",
    "import lightgbm as lgb\n",
    "from catboost import CatBoostRegressor\n",
    "\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.model_selection import GridSearchCV,KFold\n",
    "from sklearn.tree import DecisionTreeRegressor\n",
    "from sklearn.ensemble import GradientBoostingRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2019-06-05T06:44:41.145Z"
    },
    "_uuid": "c86a043a2dcd76b6a307f366f9725762c83db68a"
   },
   "outputs": [],
   "source": [
    "##这里数据读取需要将近2分钟\n",
    "%%time\n",
    "train = pd.read_csv('/kaggle/input/LANL-Earthquake-Prediction/train.csv', dtype={'acoustic_data': np.int16, 'time_to_failure': np.float64})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "76bc1035fec341ed522739e02c477b10e6f766cf"
   },
   "outputs": [],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "247b97a1d70e15567afd7b8faec1bd5b74b6186c"
   },
   "outputs": [],
   "source": [
    "train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "279bbb1b6c0e871eab7d73f53c30da122d34e59c"
   },
   "outputs": [],
   "source": [
    "#通过线性的方式构造特征 arr:每段信号数据\n",
    "def add_trend_feature(arr, abs_values=False):\n",
    "    idx = np.array(range(len(arr)))\n",
    "    if abs_values:\n",
    "        arr = np.abs(arr)\n",
    "    lr = LinearRegression()\n",
    "    lr.fit(idx.reshape(-1, 1), arr)\n",
    "    return lr.coef_[0]\n",
    "\n",
    "# 通过微震信号自动检测的STA/LTA算法构造特征\n",
    "def classic_sta_lta(x, length_sta, length_lta):\n",
    "    sta = np.cumsum(x ** 2)\n",
    "    # 变为 float\n",
    "    sta = np.require(sta, dtype=np.float)\n",
    "\n",
    "    lta = sta.copy()\n",
    "    # 计算 STA 和 LTA\n",
    "    sta[length_sta:] = sta[length_sta:] - sta[:-length_sta]\n",
    "    sta /= length_sta\n",
    "    lta[length_lta:] = lta[length_lta:] - lta[:-length_lta]\n",
    "    lta /= length_lta\n",
    "    \n",
    "    sta[:length_lta - 1] = 0\n",
    "    # 通过将零值设置为极小的浮点数来避免除以零\n",
    "    dtiny = np.finfo(0.0).tiny\n",
    "    idx = lta < dtiny\n",
    "    lta[idx] = dtiny\n",
    "    return sta / lta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "b43f03d2277cd92ff9f00bc4d79971738789ad71"
   },
   "outputs": [],
   "source": [
    "rows = 150000 #每次取信号长度\n",
    "segments = int(np.floor(train.shape[0] / rows))\n",
    "\n",
    "X = pd.DataFrame(index=range(segments), dtype=np.float64)\n",
    "Y = pd.DataFrame(index=range(segments), dtype=np.float64,\n",
    "                       columns=['time_to_failure'])\n",
    "\n",
    "#构建特征\n",
    "for segment in range(segments):\n",
    "    seg = train.iloc[segment*rows:segment*rows+rows]\n",
    "    x = seg['acoustic_data'].values\n",
    "    x1 = seg['acoustic_data']\n",
    "    zc = np.fft.fft(x)\n",
    "    y = seg['time_to_failure'].values[-1]\n",
    "    \n",
    "    Y.loc[segment, 'time_to_failure'] = y\n",
    "    \n",
    "    realFFT = np.real(zc)\n",
    "    imagFFT = np.imag(zc)\n",
    "    X.loc[segment, 'Rmean'] = realFFT.mean()\n",
    "    X.loc[segment, 'Rstd'] = realFFT.std()\n",
    "    X.loc[segment, 'Rmax'] = realFFT.max()\n",
    "    X.loc[segment, 'Rmin'] = realFFT.min()\n",
    "    X.loc[segment, 'Imean'] = imagFFT.mean()\n",
    "    X.loc[segment, 'Istd'] = imagFFT.std()\n",
    "    X.loc[segment, 'Imax'] = imagFFT.max()\n",
    "    X.loc[segment, 'Imin'] = imagFFT.min()\n",
    "    X.loc[segment, 'Rmean_last_5000'] = realFFT[-5000:].mean()\n",
    "    X.loc[segment, 'Rstd__last_5000'] = realFFT[-5000:].std()\n",
    "    X.loc[segment, 'Rmax_last_5000'] = realFFT[-5000:].max()\n",
    "    X.loc[segment, 'Rmin_last_5000'] = realFFT[-5000:].min()\n",
    "    X.loc[segment, 'Rmean_last_15000'] = realFFT[-15000:].mean()\n",
    "    X.loc[segment, 'Rstd_last_15000'] = realFFT[-15000:].std()\n",
    "    X.loc[segment, 'Rmax_last_15000'] = realFFT[-15000:].max()\n",
    "    X.loc[segment, 'Rmin_last_15000'] = realFFT[-15000:].min()\n",
    "    \n",
    "    \n",
    "    X.loc[segment, 'mean'] = x.mean()\n",
    "    X.loc[segment, 'stdev'] = x.std()\n",
    "    X.loc[segment, 'variance'] = np.var(x)\n",
    "    X.loc[segment, 'max'] = x.max()\n",
    "    X.loc[segment, 'min'] = x.min()\n",
    "    X.loc[segment, 'max-min-diff'] = x.max()-x.min()\n",
    "    X.loc[segment, 'max-mean-diff'] = x.max()-x.mean()\n",
    "    X.loc[segment, 'mean-change-abs'] = np.mean(np.diff(x))\n",
    "    X.loc[segment, 'abs-min'] = np.abs(x).min()\n",
    "    X.loc[segment, 'abs-max'] = np.abs(x).max()\n",
    "    X.loc[segment, 'abs_mean'] = np.abs(x).mean()\n",
    "    X.loc[segment, 'abs_std'] = np.abs(x).std()\n",
    "    X.loc[segment, 'std-first-50000'] = x[:50000].std()\n",
    "    X.loc[segment, 'std-last-50000'] = x[-50000:].std()\n",
    "    X.loc[segment, 'mean-first-50000'] = x[:50000].min()\n",
    "    X.loc[segment, 'mean-last-50000'] = x[-50000:].mean()\n",
    "    X.loc[segment, 'max-first-50000'] = x[:50000].max()\n",
    "    X.loc[segment, 'max-last-50000'] = x[-50000:].max()\n",
    "    X.loc[segment, 'min-first-50000'] = x[:50000].min()\n",
    "    X.loc[segment, 'min-last-50000'] = x[-50000:].min()\n",
    "    X.loc[segment, 'q01'] = np.quantile(x, 0.01)\n",
    "    X.loc[segment, 'q05'] = np.quantile(x, 0.05)\n",
    "    X.loc[segment, 'q10'] = np.quantile(x, 0.10)\n",
    "    X.loc[segment, 'q95'] = np.quantile(x, 0.95)\n",
    "    X.loc[segment, 'q99'] = np.quantile(x, 0.99)\n",
    "    \n",
    "    X.loc[segment, 'trend'] = add_trend_feature(x)\n",
    "    X.loc[segment, 'abs_trend'] = add_trend_feature(x, abs_values=True)\n",
    "    \n",
    "    X.loc[segment, 'classic_sta_lta1_mean'] = classic_sta_lta(x, 500, 10000).mean()\n",
    "    X.loc[segment, 'classic_sta_lta2_mean'] = classic_sta_lta(x, 5000, 100000).mean()\n",
    "    X.loc[segment, 'classic_sta_lta3_mean'] = classic_sta_lta(x, 3333, 6666).mean()\n",
    "    X.loc[segment, 'classic_sta_lta4_mean'] = classic_sta_lta(x, 10000, 25000).mean()\n",
    "    #窗口滑动 rolling()\n",
    "    for w in [10, 50, 100, 1000]:\n",
    "        x_roll_std = x1.rolling(w).std().dropna().values\n",
    "        x_roll_mean = x1.rolling(w).mean().dropna().values\n",
    "        x_roll_abs_mean = x1.abs().rolling(w).mean().dropna().values\n",
    "        \n",
    "        X.loc[segment, 'ave_roll_std_' + str(w)] = x_roll_std.mean()\n",
    "        X.loc[segment, 'std_roll_std_' + str(w)] = x_roll_std.std()\n",
    "        X.loc[segment, 'max_roll_std_' + str(w)] = x_roll_std.max()\n",
    "        X.loc[segment, 'min_roll_std_' + str(w)] = x_roll_std.min()\n",
    "        X.loc[segment, 'q01_roll_std_' + str(w)] = np.quantile(x_roll_std, 0.01)\n",
    "        X.loc[segment, 'q05_roll_std_' + str(w)] = np.quantile(x_roll_std, 0.05)\n",
    "        X.loc[segment, 'q10_roll_std_' + str(w)] = np.quantile(x_roll_std, 0.10)\n",
    "        X.loc[segment, 'q95_roll_std_' + str(w)] = np.quantile(x_roll_std, 0.95)\n",
    "        X.loc[segment, 'q99_roll_std_' + str(w)] = np.quantile(x_roll_std, 0.99)\n",
    "        \n",
    "        X.loc[segment, 'ave_roll_mean_' + str(w)] = x_roll_mean.mean()\n",
    "        X.loc[segment, 'std_roll_mean_' + str(w)] = x_roll_mean.std()\n",
    "        X.loc[segment, 'max_roll_mean_' + str(w)] = x_roll_mean.max()\n",
    "        X.loc[segment, 'min_roll_mean_' + str(w)] = x_roll_mean.min()\n",
    "        X.loc[segment, 'q01_roll_mean_' + str(w)] = np.quantile(x_roll_mean, 0.01)\n",
    "        X.loc[segment, 'q05_roll_mean_' + str(w)] = np.quantile(x_roll_mean, 0.05)\n",
    "        X.loc[segment, 'q95_roll_mean_' + str(w)] = np.quantile(x_roll_mean, 0.95)\n",
    "        X.loc[segment, 'q99_roll_mean_' + str(w)] = np.quantile(x_roll_mean, 0.99)\n",
    "        \n",
    "        X.loc[segment, 'ave_roll_abs_mean_' + str(w)] = x_roll_abs_mean.mean()\n",
    "        X.loc[segment, 'std_roll_abs_mean_' + str(w)] = x_roll_abs_mean.std()\n",
    "        X.loc[segment, 'max_roll_abs_mean_' + str(w)] = x_roll_abs_mean.max()\n",
    "        X.loc[segment, 'min_roll_abs_mean_' + str(w)] = x_roll_abs_mean.min()\n",
    "        X.loc[segment, 'q01_roll_abs_mean_' + str(w)] = np.quantile(x_roll_abs_mean, 0.01)\n",
    "        X.loc[segment, 'q05_roll_abs_mean_' + str(w)] = np.quantile(x_roll_abs_mean, 0.05)\n",
    "        X.loc[segment, 'q95_roll_abs_mean_' + str(w)] = np.quantile(x_roll_abs_mean, 0.95)\n",
    "        X.loc[segment, 'q99_roll_abs_mean_' + str(w)] = np.quantile(x_roll_abs_mean, 0.99)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "9bae3281e9833ab6c72e6b377c1683ce0999bca2"
   },
   "outputs": [],
   "source": [
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "867d75708112efad74d90911e8c0da40755b6dee"
   },
   "outputs": [],
   "source": [
    "'''网格调参 函数\n",
    "model:模型\n",
    "parames:需调节的参数\n",
    "features:特征\n",
    "target:标签\n",
    "'''\n",
    "num_folds = 5\n",
    "def grid_search_cv(model, parames, features, target):\n",
    "    t0 = time.time()\n",
    "    reg = GridSearchCV(model, parames, cv=num_folds, scoring='neg_mean_absolute_error')\n",
    "    reg.fit(features, target)\n",
    "    \n",
    "    t0 = time.time() - t0\n",
    "    print(\"Best CV score: {:.4f}, time: {:.1f}s\".format(-reg.best_score_, t0))\n",
    "    print(reg.best_params_)\n",
    "    return reg.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "bebaa0cc0aea4f62c3ce8b57417d54c8d0e6d59f"
   },
   "outputs": [],
   "source": [
    "#标准化特征\n",
    "scaler = StandardScaler()\n",
    "scaler.fit(X)\n",
    "train_x = scaler.transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "eadaa44a1876a77417272c5fa1b0c369a371ddc3"
   },
   "outputs": [],
   "source": [
    "train_y = Y.values.flatten()\n",
    "train_y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "cc54cb66e35e766f90e6dc846c60e08a131de64e"
   },
   "outputs": [],
   "source": [
    "#模型GBDT训练\n",
    "gbdt = GradientBoostingRegressor(learning_rate=0.05, min_samples_split=10,n_estimators=400,max_depth=3,max_features=11,\n",
    "                                 min_samples_leaf=200,subsample=0.3,random_state=10)\n",
    "gbdt.fit(train_x,train_y)\n",
    "# 网格调节参数\n",
    "# best_gbdt_params = grid_search_cv(gbdt, parame_gbdt, train_x, train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "41afc605b2f050439ce21d085d2962b8184792f2"
   },
   "outputs": [],
   "source": [
    "#模型XGB训练\n",
    "# params_xgb = {'reg_alpha': [0.05, 0.1,0.25,0.5,0.75, 1, 2, 3], 'reg_lambda': [0.05, 0.1,0.25,0.5,0.75, 1, 2, 3]}\n",
    "xgb_fit = xgb.XGBRegressor(learning_rate=0.05,seed=0,n_estimators=200,max_depth=1,min_child_weight=7,gamma=0.1,\n",
    "                           subsample=0.2,colsample_bytree=0.75,reg_alpha=2,reg_lambda=2)\n",
    "# best_xgb_params = grid_search_cv(xgb_fit, params_xgb, train_x, train_y)\n",
    "xgb_fit.fit(train_x,train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "83118c24acd6ed94937d0d5fdc26f093abaf4288"
   },
   "outputs": [],
   "source": [
    "#模型LGB训练\n",
    "# params_lgb = {'reg_alpha': [0, 0.001, 0.01, 0.03, 0.08, 0.1,0.3, 0.5],'reg_lambda': [0, 0.001, 0.01, 0.03, 0.08, 0.1,0.3, 0.5,0.75]}\n",
    "lgb_fit =lgb.LGBMRegressor(objective='regression',n_estimators=200,learning_rate=0.05,max_depth=1,min_child_samples=20,reg_alpha=0.5,\n",
    "                           reg_lambda=0.75,min_child_weight=0.001,num_leaves=47,feature_fraction=0.9,bagging_fraction=0.6)\n",
    "# best_lgb_params = grid_search_cv(lgb_fit, params_lgb, train_x, train_y)\n",
    "lgb_fit.fit(train_x,train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "af124bcd91095e8784b1f9cedbc9a2ecc10008dd"
   },
   "outputs": [],
   "source": [
    "#Catboost模型训练\n",
    "# parameters = {'learning_rate': [0.01,0.02,0.1,0.5,1],\n",
    "#         'l2_leaf_reg':[0.01,0.1,0.5,1],\n",
    "#         'depth':[2,3,4,5,6,7]\n",
    "# }\n",
    "# # m.best_params_{'depth': 2, 'l2_leaf_reg': 1, 'learningf_rate': 0.01}\n",
    "# cat = CatBoostRegressor(iterations=1000,loss_function='MAE')\n",
    "# # cat.fit(X,Y.values.flatten())\n",
    "# m = GridSearchCV(cat, param_grid=parameters, cv=3)\n",
    "# m.fit(X, Y.values.flatten(), silent=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "5e8d17472037bae1de7dd55d1a2a6f1ce6eb06e2"
   },
   "outputs": [],
   "source": [
    "# pred = clf.predict(X_test_sc)\n",
    "# mean_absolute_error(y_test, pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "ba36892840913bac0d342b6ebaaa8de8e809b7e4"
   },
   "outputs": [],
   "source": [
    "# 初始化测试数据\n",
    "submission = pd.read_csv('sample_submission.csv', index_col='seg_id')\n",
    "X_testx = pd.DataFrame(columns=X.columns, dtype=np.float64, index=submission.index)\n",
    "X_testx.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "db2068e0618bfe0e2f59624e746769719b7709ba"
   },
   "outputs": [],
   "source": [
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 傅里叶变换在信号数据中有极大的影响力--np.fft.fft()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "a2354f6435fca49b7e233ad3068b552a7c1e3337"
   },
   "outputs": [],
   "source": [
    "# 添加测试集的特征\n",
    "for seg_id in tqdm(X_testx.index):\n",
    "    seg = pd.read_csv('/kaggle/input/LANL-Earthquake-Prediction/test/' + seg_id + '.csv')\n",
    "    x = seg['acoustic_data'].values\n",
    "    x1 = seg['acoustic_data']\n",
    "    zc = np.fft.fft(x)###傅里叶变换\n",
    "    \n",
    "    realFFT = np.real(zc)\n",
    "    imagFFT = np.imag(zc)\n",
    "    X_testx.loc[seg_id, 'Rmean'] = realFFT.mean()\n",
    "    X_testx.loc[seg_id, 'Rstd'] = realFFT.std()\n",
    "    X_testx.loc[seg_id, 'Rmax'] = realFFT.max()\n",
    "    X_testx.loc[seg_id, 'Rmin'] = realFFT.min()\n",
    "    X_testx.loc[seg_id, 'Imean'] = imagFFT.mean()\n",
    "    X_testx.loc[seg_id, 'Istd'] = imagFFT.std()\n",
    "    X_testx.loc[seg_id, 'Imax'] = imagFFT.max()\n",
    "    X_testx.loc[seg_id, 'Imin'] = imagFFT.min()\n",
    "    X_testx.loc[seg_id, 'Rmean_last_5000'] = realFFT[-5000:].mean()\n",
    "    X_testx.loc[seg_id, 'Rstd__last_5000'] = realFFT[-5000:].std()\n",
    "    X_testx.loc[seg_id, 'Rmax_last_5000'] = realFFT[-5000:].max()\n",
    "    X_testx.loc[seg_id, 'Rmin_last_5000'] = realFFT[-5000:].min()\n",
    "    X_testx.loc[seg_id, 'Rmean_last_15000'] = realFFT[-15000:].mean()\n",
    "    X_testx.loc[seg_id, 'Rstd_last_15000'] = realFFT[-15000:].std()\n",
    "    X_testx.loc[seg_id, 'Rmax_last_15000'] = realFFT[-15000:].max()\n",
    "    X_testx.loc[seg_id, 'Rmin_last_15000'] = realFFT[-15000:].min()\n",
    "    \n",
    "    \n",
    "    \n",
    "    X_testx.loc[seg_id, 'mean'] = x.mean()\n",
    "    X_testx.loc[seg_id, 'stdev'] = x.std()\n",
    "    X_testx.loc[seg_id, 'variance'] = np.var(x)\n",
    "    X_testx.loc[seg_id, 'max'] = x.max()\n",
    "    X_testx.loc[seg_id, 'min'] = x.min()\n",
    "    X_testx.loc[seg_id, 'max-min-diff'] = x.max()-x.min()\n",
    "    X_testx.loc[seg_id, 'max-mean-diff'] = x.max()-x.mean()\n",
    "    X_testx.loc[seg_id, 'mean-change-abs'] = np.mean(np.diff(x))\n",
    "    X_testx.loc[seg_id, 'abs-min'] = np.abs(x).min()\n",
    "    X_testx.loc[seg_id, 'abs-max'] = np.abs(x).max()\n",
    "    X_testx.loc[seg_id, 'abs_mean'] = np.abs(x).mean()\n",
    "    X_testx.loc[seg_id, 'abs_std'] = np.abs(x).std()\n",
    "    X_testx.loc[seg_id, 'std-first-50000'] = x[:50000].std()\n",
    "    X_testx.loc[seg_id, 'std-last-50000'] = x[-50000:].std()\n",
    "    X_testx.loc[seg_id, 'mean-first-50000'] = x[:50000].min()\n",
    "    X_testx.loc[seg_id, 'mean-last-50000'] = x[-50000:].mean()\n",
    "    X_testx.loc[seg_id, 'max-first-50000'] = x[:50000].max()\n",
    "    X_testx.loc[seg_id, 'max-last-50000'] = x[-50000:].max()\n",
    "    X_testx.loc[seg_id, 'min-first-50000'] = x[:50000].min()\n",
    "    X_testx.loc[seg_id, 'min-last-50000'] = x[-50000:].min()\n",
    "    X_testx.loc[seg_id, 'q01'] = np.quantile(x, 0.01)\n",
    "    X_testx.loc[seg_id, 'q05'] = np.quantile(x, 0.05)\n",
    "    X_testx.loc[seg_id, 'q10'] = np.quantile(x, 0.10)\n",
    "    X_testx.loc[seg_id, 'q95'] = np.quantile(x, 0.95)\n",
    "    X_testx.loc[seg_id, 'q99'] = np.quantile(x, 0.99)\n",
    "    \n",
    "    X_testx.loc[seg_id, 'trend'] = add_trend_feature(x)\n",
    "    X_testx.loc[seg_id, 'abs_trend'] = add_trend_feature(x, abs_values=True)\n",
    "    \n",
    "    X_testx.loc[seg_id, 'classic_sta_lta1_mean'] = classic_sta_lta(x, 500, 10000).mean()\n",
    "    X_testx.loc[seg_id, 'classic_sta_lta2_mean'] = classic_sta_lta(x, 5000, 100000).mean()\n",
    "    X_testx.loc[seg_id, 'classic_sta_lta3_mean'] = classic_sta_lta(x, 3333, 6666).mean()\n",
    "    X_testx.loc[seg_id, 'classic_sta_lta4_mean'] = classic_sta_lta(x, 10000, 25000).mean()\n",
    "    \n",
    "    for w in [10, 50, 100, 1000]:\n",
    "        x_roll_std = x1.rolling(w).std().dropna().values\n",
    "        x_roll_mean = x1.rolling(w).mean().dropna().values\n",
    "        x_roll_abs_mean = x1.abs().rolling(w).mean().dropna().values\n",
    "        \n",
    "        X_testx.loc[seg_id, 'ave_roll_std_' + str(w)] = x_roll_std.mean()\n",
    "        X_testx.loc[seg_id, 'std_roll_std_' + str(w)] = x_roll_std.std()\n",
    "        X_testx.loc[seg_id, 'max_roll_std_' + str(w)] = x_roll_std.max()\n",
    "        X_testx.loc[seg_id, 'min_roll_std_' + str(w)] = x_roll_std.min()\n",
    "        X_testx.loc[seg_id, 'q01_roll_std_' + str(w)] = np.quantile(x_roll_std, 0.01)\n",
    "        X_testx.loc[seg_id, 'q05_roll_std_' + str(w)] = np.quantile(x_roll_std, 0.05)\n",
    "        X_testx.loc[seg_id, 'q10_roll_std_' + str(w)] = np.quantile(x_roll_std, 0.10)\n",
    "        X_testx.loc[seg_id, 'q95_roll_std_' + str(w)] = np.quantile(x_roll_std, 0.95)\n",
    "        X_testx.loc[seg_id, 'q99_roll_std_' + str(w)] = np.quantile(x_roll_std, 0.99)\n",
    "        \n",
    "        X_testx.loc[seg_id, 'ave_roll_mean_' + str(w)] = x_roll_mean.mean()\n",
    "        X_testx.loc[seg_id, 'std_roll_mean_' + str(w)] = x_roll_mean.std()\n",
    "        X_testx.loc[seg_id, 'max_roll_mean_' + str(w)] = x_roll_mean.max()\n",
    "        X_testx.loc[seg_id, 'min_roll_mean_' + str(w)] = x_roll_mean.min()\n",
    "        X_testx.loc[seg_id, 'q01_roll_mean_' + str(w)] = np.quantile(x_roll_mean, 0.01)\n",
    "        X_testx.loc[seg_id, 'q05_roll_mean_' + str(w)] = np.quantile(x_roll_mean, 0.05)\n",
    "        X_testx.loc[seg_id, 'q95_roll_mean_' + str(w)] = np.quantile(x_roll_mean, 0.95)\n",
    "        X_testx.loc[seg_id, 'q99_roll_mean_' + str(w)] = np.quantile(x_roll_mean, 0.99)\n",
    "        \n",
    "        X_testx.loc[seg_id, 'ave_roll_abs_mean_' + str(w)] = x_roll_abs_mean.mean()\n",
    "        X_testx.loc[seg_id, 'std_roll_abs_mean_' + str(w)] = x_roll_abs_mean.std()\n",
    "        X_testx.loc[seg_id, 'max_roll_abs_mean_' + str(w)] = x_roll_abs_mean.max()\n",
    "        X_testx.loc[seg_id, 'min_roll_abs_mean_' + str(w)] = x_roll_abs_mean.min()\n",
    "        X_testx.loc[seg_id, 'q01_roll_abs_mean_' + str(w)] = np.quantile(x_roll_abs_mean, 0.01)\n",
    "        X_testx.loc[seg_id, 'q05_roll_abs_mean_' + str(w)] = np.quantile(x_roll_abs_mean, 0.05)\n",
    "        X_testx.loc[seg_id, 'q95_roll_abs_mean_' + str(w)] = np.quantile(x_roll_abs_mean, 0.95)\n",
    "        X_testx.loc[seg_id, 'q99_roll_abs_mean_' + str(w)] = np.quantile(x_roll_abs_mean, 0.99)\n",
    "X_test_scaled = pd.DataFrame(scaler.transform(X_testx), columns=X_testx.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "2c4a1442c1a2ddf36fcaabf60d5db3f6d4b831f5"
   },
   "outputs": [],
   "source": [
    "X_test_scaled.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "cff39572604248c4e66d1285e819fe60bd884f46"
   },
   "outputs": [],
   "source": [
    "#三种模型的预测\n",
    "pred_test1 = gbdt.predict(X_test_scaled)\n",
    "pred_test2 = xgb_fit.predict(X_test_scaled.values)\n",
    "pred_test3 = lgb_fit.predict(X_test_scaled)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_uuid": "e3372f59ff8ad640404578b829adb39acdb79f13"
   },
   "source": [
    "保存为需求规格文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "79d24e5e9778f1a8bc66e0ab4f7950fafb79c740"
   },
   "outputs": [],
   "source": [
    "submission['time_to_failure'] = pred_test1\n",
    "submission.to_csv('submission10.csv',index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "1abb16b666f602b379b47cb40f6a843cc3b55d54"
   },
   "outputs": [],
   "source": [
    "submission['time_to_failure'] = pred_test2\n",
    "submission.to_csv('submission11.csv',index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "_uuid": "293e24e8fd9504b902db1cdb858b85b139ae0396"
   },
   "outputs": [],
   "source": [
    "submission['time_to_failure'] = pred_test3\n",
    "submission.to_csv('submission12.csv',index=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
